# Setup -------------------------------------------------------------------
rm(list = ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
library(tidyverse)
library(estimatr)
## Versions of non-base attached packages
## forcats_0.5.1; stringr_1.4.0; dplyr_1.0.8; purrr_0.3.4; readr_2.1.2; tidyr_1.2.0; tibble_3.1.7;
## ggplot2_3.3.5; tidyverse_1.3.1; estimatr_0.30.2

# Section: The Case of Puerto Rico and Hurricane Maria --------------------

## Figure 1
load(file = "salience.RData")

salience_bar <- ggplot(data = salience,
                       mapping = aes(x = year,
                                     y = value)) +
  geom_bar(stat = 'identity') +
  geom_vline(xintercept = 2017,
             color = "red",
             linetype = "dashed") + 
  facet_wrap(facets = ~ measure,
             scales = "free_y") +
  theme_bw() + 
  ylab("Frequency") + 
  xlab("Year")

ggsave(plot = salience_bar,
       file = "salience_bar.pdf",
       width = 5,
       height = 3,
       units = "in",
       dpi = 600) 

# Section: Survey Experimental Design -------------------------------------
load(file = "puertorico.RData")

## Table 1
table(puertorico$treat_race, puertorico$treat_lang)

## Table 2

load(file = "CCES_Pre_Test.RData")

dplyr::group_by(.data = CCES_Pre_Test,
                light_skin_actor, sex) %>%
  dplyr::summarise(N = n(),
                   .groups = "keep")

dplyr::group_by(.data = CCES_Pre_Test,
                light_skin_actor, age) %>%
  dplyr::summarise(N = n(),
                   .groups = "keep")

dplyr::group_by(.data = CCES_Pre_Test,
                light_skin_actor, race) %>%
  dplyr::summarise(N = n(),
                   .groups = "keep")

## Table 5

load("ANES_2018.RData")
load("CCES_2018.RData")
cbind(round(x = mean(CCES_2018$age), digits = 2),
      round(x = mean(ANES_2018$age), digits = 2))
cbind(round(x = mean(CCES_2018$female), digits = 2),
      round(x = mean(ANES_2018$female), digits = 2))
cbind(round(x = mean(CCES_2018$college_grad), digits = 2),
      round(x = mean(ANES_2018$college_grad), digits = 2))
cbind(round(x = mean(CCES_2018$white), digits = 2),
      round(x = mean(ANES_2018$white), digits = 2))
cbind(round(x = mean(CCES_2018$south), digits = 2),
      round(x = mean(ANES_2018$south), digits = 2))
cbind(round(x = mean(CCES_2018$republican), digits = 2),
      round(x = mean(ANES_2018$republican), digits = 2))
cbind(round(x = mean(CCES_2018$vote_2016), digits = 2),
      round(x = mean(ANES_2018$vote_2016), digits = 2))
cbind(round(x = sum(CCES_2018$trump_2016 == 1)/nrow(CCES_2018), digits = 2),
      round(x = sum(ANES_2018$trump_2016 == 1)/nrow(ANES_2018), digits = 2))
cbind(round(x = mean(CCES_2018$fam_income), digits = 2),
      round(x = mean(ANES_2018$fam_income), digits = 2))

# Section: Marginal Effects of Skin Color and Language --------------------

## Function to calculate marginal effects
marginal_eff <- function(treatment_name, outcome_name, resid_covs_names, data, Omega, digits = 3, residualize = TRUE){
  
  treatment = data[ , treatment_name]
  outcome = data[ , outcome_name]
  resid_outcome = resid(lm(formula = reformulate(termlabels = resid_covs_names,
                                                 response = outcome_name),
                           data = data))
  
  n_treated = sum(treatment == 1)
  n_control = sum(treatment == 0)
  
  est_ATE = mean(outcome[treatment == 1]) - mean(outcome[treatment == 0])
  est_SE = sqrt((var(outcome[treatment == 1]) / n_treated) + (var(outcome[treatment == 0]) / n_control))
  control_group_mean = mean(outcome[treatment == 0])
  control_group_SD = sqrt(mean((outcome[treatment == 0] - mean(outcome[treatment == 0]))^2))
  
  if(residualize == TRUE){
    
    obs_test_stat = mean(resid_outcome[treatment == 1]) - mean(resid_outcome[treatment == 0])
    null_dist = apply(X = Omega,
                      MARGIN = 2,
                      FUN = function(x) { mean(resid_outcome[x == 1]) -
                          mean(resid_outcome[x == 0])  })
    sharp_null_SE = mean((null_dist - mean(null_dist))^2)
    upper_p_value = mean(null_dist >= obs_test_stat)
    lower_p_value = mean(null_dist <= obs_test_stat)
    two_sided_p_value = min(1, 2 * min(upper_p_value, lower_p_value))
    RI_SE = sqrt(mean((null_dist - mean(null_dist))^2))
    
  } else {
    obs_test_stat = mean(outcome[treatment == 1]) - mean(outcome[treatment == 0])
    null_dist = apply(X = Omega,
                      MARGIN = 2,
                      FUN = function(x) { mean(outcome[x == 1]) -
                          mean(outcome[x == 0])  })
    sharp_null_SE = mean((null_dist - mean(null_dist))^2)
    upper_p_value = mean(null_dist >= obs_test_stat)
    lower_p_value = mean(null_dist <= obs_test_stat)
    two_sided_p_value = min(1, 2 * min(upper_p_value, lower_p_value))
    RI_SE = sqrt(mean((null_dist - mean(null_dist))^2))
    
  }
  
  results = list("est_ATE" = round(x = est_ATE, digits = digits),
                 "est_SE" = round(x = est_SE, digits = digits),
                 "control_group_mean" = round(x = control_group_mean, digits = digits),
                 "control_group_SD" = round(x = control_group_SD, digits = digits),
                 "obs_test_stat" = round(x = obs_test_stat, digits = digits),
                 "sharp_null_SE" = round(x = sharp_null_SE, digits = digits),
                 "upper_p_value" = round(x = upper_p_value, digits = digits),
                 "lower_p_value" = round(x = lower_p_value, digits = digits),
                 "two_sided_p_value" = round(x = two_sided_p_value, digits = digits))
  
  return(results)
  
}

## Covariates for residualization in Equation 1
resid_covs <- c("age", "female", "edu",
                "white", "south", "republican",
                "trump", "income","citizenship_pr",
                "language_pr_bin", "republican_mis", "trump_mis",
                "income_mis", "citizenship_pr_mis", "language_pr_mis")

## Table 6: Effects of Skin Tone on Attitudes toward Puerto Rico

## Create empty table for results
marg_eff_stats <- c("est_ATE", "est_SE", "control_group_mean", "control_group_SD",
                    "obs_test_stat", "sharp_null_SE", "upper_p_value", "lower_p_value", "two_sided_p_value")
outcomes <- c("federal_aid", "statehood", "puerto_rican_vote", "approval_trump")

## Create permutation matrix of treatment assignments
set.seed(112417)
race_Omega <- replicate(n = 10^3, expr = sample(puertorico$treat_race))

## Marginal effects of race
marginal_effs_race <- matrix(data = NA,
                             nrow = length(marg_eff_stats),
                             ncol = length(outcomes),
                             dimnames = list(marg_eff_stats, outcomes))

for(i in outcomes){
  
  marginal_effs_race[,i] = unlist(marginal_eff(treatment_name = "treat_race",
                                               outcome_name = i,
                                               resid_covs_names = resid_covs,
                                               data = puertorico,
                                               Omega = race_Omega,
                                               digits = 3))
  
}

## Table 7: Effects of Language on Attitudes toward Puerto Rico

## Create permutation matrix of treatment assignments
set.seed(112417)
lang_Omega <- replicate(n = 10^3, expr = sample(puertorico$treat_lang))

## Marginal effects of language
marginal_effs_lang <- matrix(data = NA,
                             nrow = length(marg_eff_stats),
                             ncol = length(outcomes),
                             dimnames = list(marg_eff_stats, outcomes))

for(i in outcomes){
  
  marginal_effs_lang[,i] = unlist(marginal_eff(treatment_name = "treat_lang",
                                               outcome_name = i,
                                               resid_covs_names = resid_covs,
                                               data = puertorico,
                                               Omega = lang_Omega,
                                               digits = 3))
  
}

# Section: Heterogeneous Effects of Skin Color and Language ------------------------

## Function to calculate heterogeneous effects
het_eff <- function(treatment_name, outcome_name, binary_cov_name, resid_covs_names, data, Omega, digits = 3, residualize = TRUE){
  
  treatment = data[ , treatment_name]
  binary_cov = data[ , binary_cov_name]
  outcome = data[ , outcome_name]
  resid_outcome = resid(lm(formula = reformulate(termlabels = resid_covs_names,
                                                 response = outcome_name),
                           data = data))
  
  n_treated = sum(treatment == 1)
  n_control = sum(treatment == 0)
  
  est_CATE_cov_1 = mean(outcome[treatment == 1 & binary_cov == 1]) - mean(outcome[treatment == 0 & binary_cov == 1])
  est_CATE_cov_0 = mean(outcome[treatment == 1 & binary_cov == 0]) - mean(outcome[treatment == 0 & binary_cov == 0])
  est_int_eff = est_CATE_cov_1 - est_CATE_cov_0
  
  
  ## To get standard error of interaction effect estimator
  ## Use expressions from Miratrix, Sekhon and Yu (2013), Theorem 1, pp. 374
  
  ## binary cov == 1
  n_cov_1 = sum(binary_cov == 1)
  ## Estimate betas by numerical simulation, as in Miratrix, Sekhon and Yu (2013), pp. 386
  beta_1_cov_1 = mean(apply(X = Omega,
                            MARGIN = 2,
                            FUN = function(x) { sum(1 - x[binary_cov == 1]) /
                                sum(x[binary_cov == 1]) }))
  beta_0_cov_1 = mean(apply(X = Omega,
                            MARGIN = 2,
                            FUN = function(x) { sum(x[binary_cov == 1]) /
                                sum(1 - x[binary_cov == 1]) }))
  est_var_treat_cov_1 = var(outcome[treatment == 1 & binary_cov == 1])
  est_var_control_cov_1 = var(outcome[treatment == 0 & binary_cov == 1])
  est_var_cov_1 = 1/n_cov_1 * (beta_1_cov_1 * est_var_treat_cov_1 +
                                 beta_0_cov_1 * est_var_control_cov_1 +
                                 (est_var_treat_cov_1 + est_var_control_cov_1))
  
  ## binary cov == 0
  n_cov_0 = sum(binary_cov == 0)
  ## Estimate betas by numerical simulation, as in Miratrix, Sekhon and Yu (2013), pp. 386
  beta_1_cov_0 = mean(apply(X = Omega,
                            MARGIN = 2,
                            FUN = function(x) { sum(1 - x[binary_cov == 0]) /
                                sum(x[binary_cov == 0]) }))
  beta_0_cov_0 = mean(apply(X = Omega,
                            MARGIN = 2,
                            FUN = function(x) { sum(x[binary_cov == 0]) /
                                sum(1 - x[binary_cov == 0]) }))
  est_var_treat_cov_0 = var(outcome[treatment == 1 & binary_cov == 0])
  est_var_control_cov_0 = var(outcome[treatment == 0 & binary_cov == 0])
  est_var_cov_0 = 1/n_cov_0 * (beta_1_cov_0 * est_var_treat_cov_0 +
                                 beta_0_cov_0 * est_var_control_cov_0 +
                                 (est_var_treat_cov_0 + est_var_control_cov_0))
  
  est_int_eff_SE = sqrt(est_var_cov_1 + est_var_cov_0)
  
  prop_control_cov_1 = sum((outcome[treatment == 0 & binary_cov == 1])) / sum((outcome[treatment == 0]))
  prop_control_cov_0 = sum((outcome[treatment == 0 & binary_cov == 0])) / sum((outcome[treatment == 0]))
  control_group_mean = mean(outcome[treatment == 0 & binary_cov == 1]) * prop_control_cov_1 +
    mean(outcome[treatment == 0 & binary_cov == 0]) * prop_control_cov_0
  
  var_control_cov_1 = mean((outcome[treatment == 0 & binary_cov == 1] - mean(outcome[treatment == 0 & binary_cov == 1]))^2)
  var_control_cov_0 = mean((outcome[treatment == 0 & binary_cov == 0] - mean(outcome[treatment == 0 & binary_cov == 0]))^2)
  control_group_SD = sqrt(prop_control_cov_1 * var_control_cov_1 + prop_control_cov_0 * var_control_cov_0)
  
  if(residualize == TRUE){
    
    CATE_cov_1_obs_test_stat = mean(resid_outcome[treatment == 1 & binary_cov == 1]) - mean(resid_outcome[treatment == 0 & binary_cov == 1])
    CATE_cov_0_obs_test_stat = mean(resid_outcome[treatment == 1 & binary_cov == 0]) - mean(resid_outcome[treatment == 0 & binary_cov == 0])
    int_eff_obs_test_stat = CATE_cov_1_obs_test_stat - CATE_cov_0_obs_test_stat
    
    null_dist = apply(X = Omega,
                      MARGIN = 2,
                      FUN = function(x) { (mean(resid_outcome[x == 1 & binary_cov == 1]) - mean(resid_outcome[x == 0 & binary_cov == 1])) -
                          (mean(resid_outcome[x == 1 & binary_cov == 0]) - mean(resid_outcome[x == 0 & binary_cov == 0]))  })
    sharp_null_SE = mean((null_dist - mean(null_dist))^2)
    upper_p_value = mean(null_dist >= int_eff_obs_test_stat)
    lower_p_value = mean(null_dist <= int_eff_obs_test_stat)
    two_sided_p_value = min(1, 2 * min(upper_p_value, lower_p_value))
    RI_SE = sqrt(mean((null_dist - mean(null_dist))^2))
    
  } else{
    
    CATE_cov_1_obs_test_stat = mean(outcome[treatment == 1 & binary_cov == 1]) - mean(outcome[treatment == 0 & binary_cov == 1])
    CATE_cov_0_obs_test_stat = mean(outcome[treatment == 1 & binary_cov == 0]) - mean(outcome[treatment == 0 & binary_cov == 0])
    int_eff_obs_test_stat = CATE_cov_1_obs_test_stat - CATE_cov_0_obs_test_stat
    
    null_dist = apply(X = Omega,
                      MARGIN = 2,
                      FUN = function(x) { (mean(outcome[x == 1 & binary_cov == 1]) - mean(outcome[x == 0 & binary_cov == 1])) -
                          (mean(outcome[x == 1 & binary_cov == 0]) - mean(outcome[x == 0 & binary_cov == 0]))  })
    sharp_null_SE = mean((null_dist - mean(null_dist))^2)
    upper_p_value = mean(null_dist >= int_eff_obs_test_stat)
    lower_p_value = mean(null_dist <= int_eff_obs_test_stat)
    two_sided_p_value = min(1, 2 * min(upper_p_value, lower_p_value))
    RI_SE = sqrt(mean((null_dist - mean(null_dist))^2))
    
  }
  
  results = list("est_int_eff" = round(x = est_int_eff, digits = digits),
                 "est_SE" = round(x = est_int_eff_SE, digits = digits),
                 "est_CATE_cov_0" = round(x = est_CATE_cov_0, digits = digits),
                 "int_eff_obs_test_stat" = round(x = int_eff_obs_test_stat, digits = digits),
                 "CATE_cov_0_obs_test_stat" = round(x = CATE_cov_0_obs_test_stat, digits = digits),
                 "sharp_null_SE" = round(x = sharp_null_SE, digits = digits),
                 "upper_p_value" = round(x = upper_p_value, digits = digits),
                 "lower_p_value" = round(x = lower_p_value, digits = digits),
                 "two_sided_p_value" = round(x = two_sided_p_value, digits = digits))
  
  return(results)
  
}

int_eff_covs <- c("language_pr_bin", "citizenship_pr", "republican", "white")

## Table 8: Heterogeneous Effects of Skin Tone

het_eff_stats <- c("est_int_eff", "est_SE", "est_CATE_cov_0", "int_eff_obs_test_stat",
                   "CATE_cov_0_obs_test_stat", "sharp_null_SE", "upper_p_value", "lower_p_value", "two_sided_p_value")

het_effs_race <- rep(x = list(matrix(data = NA,
                                     nrow = length(het_eff_stats),
                                     ncol = length(outcomes),
                                     dimnames = list(het_eff_stats, outcomes))),
                     times = length(int_eff_covs))
names(het_effs_race) <- int_eff_covs

for(j in int_eff_covs){
  
  for(i in outcomes){
    
    het_effs_race[[j]][,i] = unlist(het_eff(treatment_name = "treat_race",
                                            outcome_name = i,
                                            binary_cov_name = j,
                                            resid_covs_names = resid_covs,
                                            data = puertorico,
                                            Omega = race_Omega,
                                            digits = 3))
  }
  
}

## Estimated effect of race on support for federal aid among Republicans
coef(lm(formula = federal_aid ~ treat_race,
        data = puertorico,
        subset = republican == 1))[["treat_race"]]
## Estimated effect of race on support for federal aid among Non-Republicans
coef(lm(formula = federal_aid ~ treat_race,
        data = puertorico,
        subset = republican == 0))[["treat_race"]]

## Table 9: Heterogeneous Effects of Language

het_effs_lang = rep(x = list(matrix(data = NA,
                                    nrow = length(het_eff_stats),
                                    ncol = length(outcomes),
                                    dimnames = list(het_eff_stats, outcomes))),
                    times = length(int_eff_covs))
names(het_effs_lang) <- int_eff_covs

for(j in int_eff_covs){
  
  for(i in outcomes){
    
    het_effs_lang[[j]][,i] = unlist(het_eff(treatment_name = "treat_lang",
                                            outcome_name = i,
                                            binary_cov_name = j,
                                            resid_covs_names = resid_covs,
                                            data = puertorico,
                                            Omega = lang_Omega,
                                            digits = 3))
  }
  
}


# Appendix A: Descriptive Statistics --------------------------------------

## Table 1
outcome_means_by_know_PR <- dplyr::group_by(.data = dplyr::filter(.data = puertorico,
                                                                        citizenship_pr_mis == 0 & republican_mis == 0),
                                                  citizenship_pr) %>%
  dplyr::summarise(N = n(),
                   federal_aid_mean = round(x = mean(federal_aid), digits = 3),
                   statehood_mean = round(x = mean(statehood), digits = 3),
                   puerto_rican_vote_mean = round(x = mean(puerto_rican_vote), digits = 3),
                   approval_trump_mean = round(x = mean(approval_trump), digits = 3),
                   .groups = "keep")

outcome_means_by_party_know_PR <- dplyr::group_by(.data = dplyr::filter(.data = puertorico,
                                                                        citizenship_pr_mis == 0 & republican_mis == 0),
                                                  citizenship_pr,
                                                  republican) %>%
  dplyr::summarise(N = n(),
                   federal_aid_mean = round(x = mean(federal_aid), digits = 3),
                   statehood_mean = round(x = mean(statehood), digits = 3),
                   puerto_rican_vote_mean = round(x = mean(puerto_rican_vote), digits = 3),
                   approval_trump_mean = round(x = mean(approval_trump), digits = 3),
                   .groups = "keep")


## Table 2
outcome_means_by_treat <- dplyr::group_by(.data = puertorico,
                                          treat_race,
                                          treat_lang) %>%
  dplyr::summarise(N = n(),
                   federal_aid_mean = round(x = mean(federal_aid), digits = 3),
                   statehood_mean = round(x = mean(statehood), digits = 3),
                   puerto_rican_vote_mean = round(x = mean(puerto_rican_vote), digits = 3),
                   approval_trump_mean = round(x = mean(approval_trump), digits = 3),
                   .groups = "keep")


## Table 3
outcome_means_by_party <- dplyr::group_by(.data = dplyr::filter(.data = puertorico,
                                                                republican_mis == 0),
                                          treat_race, treat_lang, republican) %>%
  dplyr::summarise(N = n(),
                   federal_aid_mean = round(x = mean(federal_aid), digits = 3),
                   statehood_mean = round(x = mean(statehood), digits = 3),
                   puerto_rican_vote_mean = round(x = mean(puerto_rican_vote), digits = 3),
                   approval_trump_mean = round(x = mean(approval_trump), digits = 3),
                   .groups = "keep")
outcome_means_by_race <- dplyr::group_by(.data = dplyr::filter(.data = puertorico,
                                                               !is.na(white)),
                                         treat_race, treat_lang, white) %>%
  dplyr::summarise(N = n(),
                   federal_aid_mean = round(x = mean(federal_aid), digits = 3),
                   statehood_mean = round(x = mean(statehood), digits = 3),
                   puerto_rican_vote_mean = round(x = mean(puerto_rican_vote), digits = 3),
                   approval_trump_mean = round(x = mean(approval_trump), digits = 3),
                   .groups = "keep")
outcome_means_by_know_cit <- dplyr::group_by(.data = dplyr::filter(.data = puertorico,
                                                                   citizenship_pr_mis == 0),
                                             treat_race, treat_lang, citizenship_pr) %>%
  dplyr::summarise(N = n(),
                   federal_aid_mean = round(x = mean(federal_aid), digits = 3),
                   statehood_mean = round(x = mean(statehood), digits = 3),
                   puerto_rican_vote_mean = round(x = mean(puerto_rican_vote), digits = 3),
                   approval_trump_mean = round(x = mean(approval_trump), digits = 3),
                   .groups = "keep")

# Appendix B: Full Interaction Model for Heterogeneous Effects ------------

federal_aid_lang_treat <- estimatr::lm_lin(formula = federal_aid ~ treat_lang,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = federal_aid_lang_treat$coefficients, digits = 3)
round(x = federal_aid_lang_treat$std.error, digits = 3)
federal_aid_lang_treat$p.value
federal_aid_lang_treat$nobs
federal_aid_race_treat <- estimatr::lm_lin(formula = federal_aid ~ treat_race,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = federal_aid_race_treat$coefficients, digits = 3)
round(x = federal_aid_race_treat$std.error, digits = 3)
federal_aid_race_treat$p.value
federal_aid_race_treat$nobs

statehood_lang_treat <- estimatr::lm_lin(formula = statehood ~ treat_lang,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = statehood_lang_treat$coefficients, digits = 3)
round(x = statehood_lang_treat$std.error, digits = 3)
statehood_lang_treat$p.value
statehood_lang_treat$nobs
statehood_race_treat <- estimatr::lm_lin(formula = statehood ~ treat_race,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = statehood_race_treat$coefficients, digits = 3)
round(x = statehood_race_treat$std.error, digits = 3)
statehood_race_treat$p.value
statehood_race_treat$nobs

puerto_rican_vote_lang_treat <- estimatr::lm_lin(formula = puerto_rican_vote ~ treat_lang,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = puerto_rican_vote_lang_treat$coefficients, digits = 3)
round(x = puerto_rican_vote_lang_treat$std.error, digits = 3)
puerto_rican_vote_lang_treat$p.value
puerto_rican_vote_lang_treat$nobs
puerto_rican_vote_race_treat <- estimatr::lm_lin(formula = puerto_rican_vote ~ treat_race,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = puerto_rican_vote_race_treat$coefficients, digits = 3)
round(x = puerto_rican_vote_race_treat$std.error, digits = 3)
puerto_rican_vote_race_treat$p.value
puerto_rican_vote_race_treat$nobs

approval_trump_lang_treat <- estimatr::lm_lin(formula = approval_trump ~ treat_lang,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = approval_trump_lang_treat$coefficients, digits = 3)
round(x = approval_trump_lang_treat$std.error, digits = 3)
approval_trump_lang_treat$p.value
approval_trump_lang_treat$nobs
approval_trump_race_treat <- estimatr::lm_lin(formula = approval_trump ~ treat_race,
                                           covariates = ~ language_pr_bin + citizenship_pr + republican,
                                           data = puertorico)
round(x = approval_trump_race_treat$coefficients, digits = 3)
round(x = approval_trump_race_treat$std.error, digits = 3)
approval_trump_race_treat$p.value
approval_trump_race_treat$nobs



